#!/usr/bin/perl
use strict;

my $ssiz=7000;			# sample size

if ($ARGV[0] =~ /^-[h?]/) {
	print "Usage: determine-phred FILE

Reads a sam, fastq or pileup, possibly gzipped and returns the phred-scale, 
  either 64 or 33, based on a quick scan of the data in the file.
";
	exit 0;
}
my $cnt;
my $dphred = 64;
if ($ARGV[0] =~ /\.gz$/) {
	$ARGV[0] = "gunzip -c '$ARGV[0]'|";
}
my $qual;
my $comm;
my $fmt;
if (@ARGV > 1) {
	my @mult = @ARGV;
	for my $f (@mult) {
		@ARGV = ($f);
		determine();
		print "$f\t$dphred\n";
	}
} else {
	determine();
	print "$dphred\n";
}

sub determine {
	$_ = <>;
	if (/^\@/ && ! /^\@SQ\t/) {
		# fastq
		scalar <>;              # read
		$comm = scalar <>;      # comment
		if (!(substr($comm,0,1) eq '+')) {
			die "Unknown file format\n";
		}
		$qual = <>;
		chomp $qual;
		$fmt = 'fq';
	} elsif (/^\S+\t\d+\t[ACTGN]\t\d+\t\S+\t(\S+)$/i) {
		$qual = $1;
		$fmt = 'pileup';
	} else {
		# sam
		$fmt = 'sam';
		$qual = (split(/\t/, $_))[10];
	}
	if (!$qual) {
		die "Unknown file format\n";
	}
    my $rc = 1;
	while($qual) {
        ++$rc;
		for (my $i =length($qual)/2; $i < length($qual); ++$i) {
			if (ord(substr($qual,$i,1)) < 64) {
				$dphred = 33;
				$cnt=$ssiz;	# last
				last;
			}
		}
		$qual = '';
		last if ++$cnt >= $ssiz;	# got enough
		if ($fmt eq 'fq') {
			# fastq
			last if ! scalar <>;		# id
			last if ! scalar <>;		# read
			last if ! scalar <>;		# comment
			$qual = <>;
			chomp $qual;
		} elsif ($fmt eq 'pileup') {
			$qual = (split(/\t/, $_))[5];
		} else {
			# sam
			$qual = (split(/\t/, $_))[10];
		}
	}
    if ($rc < 10) {
		$dphred = 33;
    }
}
